{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr’\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('/data/home/marmot/camtrap/PyCharm/CameraTraps-benchmark')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from collections import OrderedDict\n",
    "from copy import deepcopy\n",
    "import os\n",
    "from copy import deepcopy\n",
    "\n",
    "from PIL import Image\n",
    "\n",
    "from data_management.cct_json_utils import CameraTrapJsonUtils\n",
    "from visualization.visualization_utils import plot_stacked_bar_chart, render_db_bounding_boxes, resize_image"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Caltech bboxes cleanup\n",
    "\n",
    "The CCT bbox database doesn't have categories in the conventional format (needs to be the detection categories, not species, and \"empty\" is implied by not having a bbox entry), and the 20190409 version did not have Sara's original bboxes. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/beaver_disk/camtrap/caltech/original/CaltechCameraTrapsBboxes.json') as f:\n",
    "    original_boxes = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "63102"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "{'date_captured': '2014-05-19 07:44:53',\n",
       " 'file_name': '5995adaf-23d2-11e8-a6a3-ec086b02610b.jpg',\n",
       " 'frame_num': 3,\n",
       " 'height': 1494,\n",
       " 'id': '5995adaf-23d2-11e8-a6a3-ec086b02610b',\n",
       " 'location': 37,\n",
       " 'rights_holder': 'Justin Brown',\n",
       " 'seq_id': '70181c0a-5567-11e8-b59c-dca9047ef277',\n",
       " 'seq_num_frames': 3,\n",
       " 'width': 2048}"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(original_boxes['images'])\n",
    "original_boxes['images'][1000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "63102"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "images_w_annotations = set()\n",
    "for a in original_boxes['annotations']:\n",
    "    images_w_annotations.add(a['image_id'])\n",
    "    \n",
    "len(images_w_annotations)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# the 'location' field needs to be a string\n",
    "for i in original_boxes['images']:\n",
    "    i['location'] = str(i['location'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Only images with annotations are in this DB - good.\n",
    "\n",
    "I don't think the cars have bounding boxes yet..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'id': 6, 'name': 'bobcat'},\n",
       " {'id': 1, 'name': 'opossum'},\n",
       " {'id': 30, 'name': 'empty'},\n",
       " {'id': 9, 'name': 'coyote'},\n",
       " {'id': 3, 'name': 'raccoon'},\n",
       " {'id': 11, 'name': 'bird'},\n",
       " {'id': 8, 'name': 'dog'},\n",
       " {'id': 16, 'name': 'cat'},\n",
       " {'id': 5, 'name': 'squirrel'},\n",
       " {'id': 10, 'name': 'rabbit'},\n",
       " {'id': 7, 'name': 'skunk'},\n",
       " {'id': 14, 'name': 'lizard'},\n",
       " {'id': 99, 'name': 'rodent'},\n",
       " {'id': 21, 'name': 'badger'},\n",
       " {'id': 34, 'name': 'deer'},\n",
       " {'id': 37, 'name': 'cow'},\n",
       " {'id': 33, 'name': 'car'},\n",
       " {'id': 51, 'name': 'fox'},\n",
       " {'id': 39, 'name': 'pig'},\n",
       " {'id': 40, 'name': 'mountain_lion'},\n",
       " {'id': 66, 'name': 'bat'},\n",
       " {'id': 97, 'name': 'insect'}]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "original_boxes['categories']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "66406"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "{'bbox': [716.8, 629.76, 261.12, 215.03999999999996],\n",
       " 'category_id': 9,\n",
       " 'id': '2a5d2808-cbf1-11e8-819c-970a9450cdbc',\n",
       " 'image_id': '59f79a18-23d2-11e8-a6a3-ec086b02610b'}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(original_boxes['annotations'])\n",
    "original_boxes['annotations'][1000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_categories = [\n",
    "  {\n",
    "   \"name\": \"animal\",\n",
    "   \"id\": 1\n",
    "  },\n",
    "  {\n",
    "   \"name\": \"person\",\n",
    "   \"id\": 2\n",
    "  },\n",
    "  {\n",
    "   \"name\": \"group\",\n",
    "   \"id\": 3\n",
    "  },\n",
    "  {\n",
    "   \"name\": \"vehicle\",\n",
    "   \"id\": 4\n",
    "  }\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "car_annos = []\n",
    "empty_annos = []\n",
    "\n",
    "for a in original_boxes['annotations']:\n",
    "    if a['category_id'] == 33:\n",
    "        car_annos.append(a)\n",
    "    if a['category_id'] == 30:\n",
    "        empty_annos.append(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2615"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(car_annos)\n",
    "len(empty_annos)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2560, 1920]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2048, 1536]\n",
      "[0, 0, 2560, 1920]\n"
     ]
    }
   ],
   "source": [
    "cct_dir = '/beaver_disk/camtrap/caltech/cct_images'\n",
    "\n",
    "for a in car_annos[100:120]:\n",
    "    image = Image.open(os.path.join(cct_dir, a['image_id'] + '.jpg'))\n",
    "    render_db_bounding_boxes([a['bbox']] , [1], image, original_size=None, label_map=None, thickness=4)\n",
    "    print(a['bbox'])\n",
    "    #image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "66406"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_annos = []\n",
    "\n",
    "for a in original_boxes['annotations']:\n",
    "    original_cat = a['category_id']\n",
    "    new_anno = deepcopy(a)\n",
    "    \n",
    "    if original_cat == 33:  # cars\n",
    "        new_anno['category_id'] = 4\n",
    "    else:\n",
    "        new_anno['category_id'] = 1\n",
    "    new_annos.append(new_anno)\n",
    "len(new_annos)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'bbox': [883.2, 612.693359375, 320.8533593750001, 358.4],\n",
       "  'category_id': 1,\n",
       "  'id': '2a551c6c-cbf1-11e8-819c-970a9450cdbc',\n",
       "  'image_id': '59421138-23d2-11e8-a6a3-ec086b02610b'},\n",
       " {'bbox': [1448.5743761062913,\n",
       "   747.0000267028745,\n",
       "   484.5405578612941,\n",
       "   633.4358334541191],\n",
       "  'category_id': 1,\n",
       "  'id': '2a551d5c-cbf1-11e8-819c-970a9450cdbc',\n",
       "  'image_id': '59ffba4c-23d2-11e8-a6a3-ec086b02610b'}]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_annos[100:102]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'contributor': 'Sara Beery',\n",
       " 'date_created': '2018-10-09 18:51:47.161213',\n",
       " 'description': 'Bounding box annotations for 63,102 images from Caltech Camera Traps, covering all classes and locations.  Contains all annotations for CCT - 20, the 20-location dataset used in the ECCV18 paper \"Recognition in Terra Incognita,\" as well as additional annotations collected by MS AI for Earth',\n",
       " 'version': 'Caltech Camera Traps - Bboxes',\n",
       " 'year': 2018}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "original_boxes['info']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'contributor': 'Sara Beery',\n",
       " 'date_created': '2018-10-09 18:51:47.161213',\n",
       " 'description': 'Bounding box annotations for 63,102 images from Caltech Camera Traps, covering all classes and locations.  Contains all annotations for CCT - 20, the 20-location dataset used in the ECCV18 paper \"Recognition in Terra Incognita,\" as well as additional annotations collected by MS AI for Earth. Version 20190904 has the categories normed to the four detector categories.',\n",
       " 'version': '20190904',\n",
       " 'year': 2018}"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_info = deepcopy(original_boxes['info'])\n",
    "new_info['version'] = '20190904'\n",
    "new_info['description'] = original_boxes['info']['description'] + '. Version 20190904 has the categories normed to the four detector categories.'\n",
    "new_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_bboxes_db = {\n",
    "    'info': new_info,\n",
    "    'categories': new_categories,\n",
    "    'images': original_boxes['images'],\n",
    "    'annotations': new_annos\n",
    "}\n",
    "new_bboxes_db = CameraTrapJsonUtils.order_db_keys(new_bboxes_db)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/beaver_disk/camtrap/caltech/original/caltech_bboxes_20190904.json', 'w') as f:\n",
    "    json.dump(new_bboxes_db, f, indent=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
